# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly
from ydata_profiling import ProfileReport
import PyQt5 as qt
from IPython.display import display, Markdown
#Enable graphing inside jupytor
#pip install PyQt5 #Install it if not installed
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.get_backend()
'module://matplotlib_inline.backend_inline'
# Load Data
df = pd.read_excel('maintenance_cleaned_extended.xlsx')
# Visualization Functions
## Bar, Scatter, Line charts
def myPlot(data,plotType,title):
data = data.sort_values(ascending=True)
xs = data.index.astype(str)
ys = data.values
if plotType == 'bar':
fig = px.bar(data_frame=data, x = xs, y = ys,color=ys,title=title+' Analysis')
elif plotType == 'scatter':
fig = px.scatter(data_frame=data, x = xs, y = ys,color=ys,title=title+' Analysis')
elif plotType == 'line':
fig = px.line(data_frame=data, x = xs, y = ys,title=title+' Analysis')
fig.update_layout(title_x=0.45)
fig.show()
def myPlot1(data, xs, ys, clr, plotType, title, sort_by=None, ascending=True):
if sort_by is not None:
data_sorted = data.sort_values(by=sort_by, ascending=ascending)
else:
data_sorted = data
xt = str(xs)
yt = str(ys)
xs = data_sorted[xs]
ys = data_sorted[ys]
clr = data_sorted[clr].astype(str) if clr else None
if plotType == 'bar':
fig = px.bar(x=xs, y=ys, color=clr, title=title + ' Analysis')
elif plotType == 'scatter':
fig = px.scatter(x=xs, y=ys, color=clr, title=title + ' Analysis')
elif plotType == 'line':
fig = px.line(x=xs, y=ys, color=clr, title=title + ' Analysis')
fig.update_layout(title_x=0.5)
fig.update_layout(xaxis_title=xt, yaxis_title=yt)
fig.show()
def myPlot2(data, plotType, title):
xs = data.index.astype(str) # Index (x-axis)
ys = data.values # Values (y-axis)
# Plot based on the plotType
if plotType == 'bar':
fig = px.bar(x=xs, y=ys, color=ys, title=title + ' Analysis')
elif plotType == 'scatter':
fig = px.scatter(x=xs, y=ys, color=ys, title=title + ' Analysis')
elif plotType == 'line':
fig = px.line(x=xs, y=ys, title=title + ' Analysis')
# Center the title
fig.update_layout(title_x=0.5)
fig.show()
def myBoxPlot(data,x,y,color,title):
fig = px.box(data, x=x, y=y, color=color, title=title)
fig.update_layout(
title_x=0.5,
xaxis_title=str(x),
yaxis_title=str(y)
)
fig.show()
## Sunburst chart
def mySunBurst(data, name, value, title):
fig = px.sunburst(
data_frame=data,
#path=['cost_category', 'damage type'], # Add both cost_category and damage type to the hierarchy
path=name,
values=value, # Define the values (damage_count)
title=title+' Analysis'
)
fig.update_layout(title_x=0.45)
fig.show()
## Pie chart
def myPie(data,title_prefix):
name = data.index
value = data.values
fig = px.pie(data_frame=data,
names = name,
values = value,
title ='Top 5 '+ title_prefix +' Analysis'
)
fig.update_layout(title_x=0.5)
fig.show()
## Combine DataFrames
def combine(data,first_field,first_field_count,field_grouped_on,resulting_field_value):
data_first_cat = data[first_field].value_counts().reset_index()
data_first_cat.columns = [first_field,first_field_count]
data_merged = data.groupby([first_field])[field_grouped_on].sum().reset_index(name=resulting_field_value)
data_merged = data_merged.merge(data_first_cat,on=first_field)
return first_field_count, resulting_field_value, data_merged
## Bi - Variance Analysis
### Cost Category Insights
#### Distribution of damage types across different cost categories
damageType_costCategory = df.groupby(['damage type','cost_category']).size().reset_index(name='Count').sort_values(by='Count')
myPlot1(damageType_costCategory,'damage type','Count','cost_category','bar','Distribution of damage types across different cost categories', sort_by=None, ascending=True)
#### Relationship between cost categories and car models
car_costCategory = df.groupby(['car','cost_category']).size().reset_index(name='Count').sort_values(by=['car'])
myPlot1(car_costCategory,'car','Count','cost_category','bar','Relationship between cost categories and car models', sort_by=None, ascending=True)
#### Comparison of service duration across different cost categories
myBoxPlot(data=df,x='cost_category',y='service_duration',color='cost_category',title='Comparison of service duration across different cost categories')
#### Cost category breakdown by location
location_costCategory = df.groupby(['location','cost_category']).size().reset_index(name='Count').sort_values(by=['location'])
myPlot1(location_costCategory,'location','Count','cost_category','bar','Cost category breakdown by location', sort_by=None, ascending=True)
#### Trends in cost categories over time
df['year_month'] = df['date ready'].dt.year.astype(str) + '_' + df['date ready'].dt.month.astype(str).str.zfill(2)
date_costCategory = df.groupby(['year_month','cost_category']).size().reset_index(name='Frequency').sort_values(by='year_month')
myPlot1(date_costCategory,'year_month','Frequency','cost_category','line','Trends in cost categories over time', sort_by=None, ascending=True)